Endowment Time Series
# load companies file of EIN to name and endowment data
companies_to_ein <- read_csv(here("data", "companies.csv")) %>%
mutate(EIN = as.character(ein)) %>%
select(EIN, organization_name)
endowment_data <- read_rds(here("data",
"endowments_by_most_recent_filings.RDS")) %>%
select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
pivot_longer(-c(EIN, fiscal_year),
names_to = "variable_name") %>%
left_join(companies_to_ein) %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year)))# extract return dates
source(here("GET_VARS.R"))
files <- dir(here("ballet_990_released_20230208"),
full.names = TRUE)
dates <- map_df(files,
~get_df(filename = .x,
variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
filter_ein()
saveRDS(dates, here('data', 'dates.RDS')) dates <- readRDS( here('data', 'dates.RDS')) %>%
select(EIN, TaxPeriodEndDt, fiscal_year)
endowment_data <- endowment_data %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
left_join(dates)# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {
plt <- data %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(!!sym(var1))) %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(!!sym(var2))) %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = organization_name,
label =EIN
)) +
geom_point() +
geom_function(fun=function(x)x,color="darkred", alpha = .8) +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank")) +
theme_bw() +
labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
facet_wrap(~fiscal_year)+
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 550)
}
# function to plot variables of interest against each other
plot_combo <- function(var1, var2, data) {
data %>%
ggplot(aes(x = !!sym(var1), y = !!sym(var2), color = EIN)) +
geom_point(alpha = .9) +
# geom_line(alpha = .5) +
facet_wrap(~fiscal_year) +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
legend.position = "none",
axis.text.x = element_text(angle = 60, vjust = .6)) +
scale_x_continuous(labels=comma) +
scale_y_continuous(labels=comma) +
labs(title = paste0(var2, " vs. ", var1),
subtitle = "Fill by EIN")
}
endowment_data_wide <- endowment_data %>%
pivot_wider(names_from=variable_name,
values_from=value) Plotting Endowment Variables Against Each Other, By Year
vars <- unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]
# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
as.data.frame()
if (!all_plots) variable_combinations <- variable_combinations[1:4,]cat('## Scale of Original Variables {.tabset} \n\n')Scale of Original Variables
pwalk(variable_combinations, ~{
cat('### ',paste0(.x, ", ", .y),'\n\n')
plt <- plot_combo(var1 = .x, var2 = .y, data = endowment_data_wide)
print(plt)
cat('\n\n')
}
)BeginningYearBalanceAmt, ContributionsAmt
BeginningYearBalanceAmt, InvestmentEarningsOrLossesAmt
BeginningYearBalanceAmt, OtherExpendituresAmt
BeginningYearBalanceAmt, EndYearBalanceAmt
By Rank
plotlist <- pmap(variable_combinations, ~{
plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
}
)
htmltools::tagList(setNames(plotlist, NULL))Compensation
m <- list(
l = 50,
r = 50,
b = 50,
t = 150,
pad = 0.5
)
source(here("GET_VARS.R"))
files <- dir(here("ballet_990_released_20230208"),
full.names = TRUE)
##################################
# EMPLOYEE INFORMATION
##################################
employee_comp_vars <- c(
"/Return/ReturnData/IRS990/TotalEmployeeCnt",
"/Return/ReturnData/IRS990/EmployeeCnt",
"/Return/ReturnData/IRS990/CYSalariesCompEmpBnftPaidAmt",
"/Return/ReturnData/IRS990/CompCurrentOfcrDirectorsGrp/TotalAmt")
employees <- map_df(files, ~get_df(filename = .x,
variables=employee_comp_vars))
employees %>%
select(-filename) %>%
mutate(across(-c(ReturnTs,EIN, fiscal_year),
as.numeric)) %>%
rename(OffDirCompAmt =TotalAmt ) %>%
saveRDS(here("data", "employees.RDS"))
#################
# SCHEDULE J
#################
comp <- map_df(files, ~get_df(filename = .x, schedule = "j"))
comp_clean <- comp %>%
rename_with(.cols= everything(),
~gsub('/Return/ReturnData/IRS990ScheduleJ/', '', .)) %>%
select(-contains("Ind")) %>%
select(fiscal_year, EIN,
contains("RltdOrgOfficerTrstKeyEmplGrp")) %>%
# only extract cols within the RltdOrgOfficerTrstKeyEmplGrp
select(EIN, fiscal_year,
matches("RltdOrgOfficerTrstKeyEmplGrp\\[.*.\\]/")) %>%
pivot_longer(-c(EIN,fiscal_year)) %>%
mutate(id = gsub("\\D", "", name),
# name_old = name,
name = gsub(".*./", "", name),
id = gsub("990", "", id))
comp_clean <- comp_clean %>%
filter(!is.na(value)) %>%
distinct() %>%
pivot_wider(names_from = name, values_from = value)
comp_clean <- comp_clean %>%
mutate(across(contains("Amt"), as.numeric))%>%
mutate(TitleTxt=tolower(TitleTxt))
saveRDS(comp_clean, here("data", "schedj.RDS"))comp_clean <- read_rds(here("data", "schedj.RDS"))%>%
left_join(companies_to_ein) %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year)))
employees <- readRDS(here("data", "employees.RDS")) %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year)))# clean up title text field because it was free text in the form 990
comp_clean <- comp_clean %>%
mutate(TitleTxt = gsub("dancer/choreographer",
"dancer / choreographer",
TitleTxt),
TitleTxt = gsub("vp", "Vice President", TitleTxt),
TitleTxt = gsub("dorector", "director",TitleTxt),
title_clean = case_when(
grepl("ceo", TitleTxt, ignore.case = TRUE ) ~"CEO",
grepl("cfo", TitleTxt, ignore.case = TRUE)~ "Chief Financial Officer",
grepl("executive dir", TitleTxt, ignore.case = TRUE) ~"Executive Director",
grepl("artistic dir",TitleTxt, ignore.case = TRUE) ~"Artistic Director",
grepl("emeritus|emerita", TitleTxt, ignore.case = TRUE) ~"Emirita/Emiritus Position",
grepl( "chief dev",TitleTxt, ignore.case=TRUE) &
grepl("officer",TitleTxt, ignore.case = TRUE) ~"Chief Development Officer",
grepl("director of market|marketing director",TitleTxt, ignore.case = TRUE) ~ "Director of Marketing",
grepl("music director",TitleTxt, ignore.case = TRUE) ~"Music Director",
grepl("mktg", TitleTxt, ignore.case = TRUE ) &
grepl("officer|ofc", TitleTxt, ignore.case = TRUE ) ~ "Marketing Officer",
grepl("Director of Development",TitleTxt, ignore.case = TRUE) ~ "Director of Development",
grepl("chief",TitleTxt, ignore.case = TRUE) &
grepl("officer",TitleTxt, ignore.case = TRUE) ~ "Other Chief Officer",
grepl("Dir of Legal",TitleTxt, ignore.case = TRUE) ~"Director of Legal Affairs",
grepl("Former Senior Dir", TitleTxt, ignore.case = TRUE) ~ "Former Senior Director",
grepl("Director|Dir", TitleTxt, ignore.case = TRUE) ~ "Other Director",
grepl("Director", TitleTxt, ignore.case = TRUE) ~ "Other Director",
TRUE ~ TitleTxt
)) Number of EINs with Each Title
# number of EINs with each type of title
comp_clean %>%
group_by(title_clean) %>%
summarize(`Number of EINs` = n_distinct(EIN)) %>%
arrange(desc(`Number of EINs`))Number of Individuals with Title
# number of individuals with title
comp_clean %>%
mutate(title_clean=tolower(title_clean)) %>%
filter(!is.na(title_clean)) %>%
group_by(title_clean) %>%
summarize(`Number of Individuals in Position` = n()) %>%
arrange(desc(`Number of Individuals in Position`))# missingness by variable
# comp_clean %>%
# select(-c(EIN,fiscal_year,id)) %>%
# is.na() %>%
# colSums() %>%
# as_tibble(rownames="Variable") %>%
# mutate(`Not Missing` = nrow(comp_clean) - value) %>%
# select(-value)Compensation by Title
Base Compensation
comp_clean %>%
group_by(title_clean) %>%
mutate(m = median(BaseCompensationFilingOrgAmt, na.rm= TRUE)) %>%
filter(!is.na(title_clean)) %>%
ungroup() %>%
ggplot(aes(x=fct_reorder(title_clean,m),
y = BaseCompensationFilingOrgAmt)) +
geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
coord_flip() +
theme_bw() +
labs(title = "Base Compensation by Title",
x = "Title")+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
axis.text.x= element_text(size = 8))Total Compensation
comp_clean %>%
group_by(title_clean) %>%
mutate(m = median(TotalCompensationFilingOrgAmt, na.rm= TRUE)) %>%
filter(!is.na(title_clean)) %>%
ungroup() %>%
ggplot(aes(x=fct_reorder(title_clean,m),
y = TotalCompensationFilingOrgAmt)) +
geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
coord_flip() +
theme_bw() +
labs(title = "Total Compensation by Title",
x = "Title")+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
axis.text.x= element_text(size = 8))Compensation by Year
plt <- comp_clean %>%
group_by(EIN, fiscal_year) %>%
summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) %>%
group_by(EIN) %>%
mutate(m = median(total_compensation, na.rm= TRUE)) %>%
ungroup() %>%
# group_by(EIN) %>%
mutate(tile = ntile(m,2),
tilename = ifelse(tile == 1,
"EINs Below the Median",
"EINs Above the Median"),
tilename = factor(tilename, levels = c( "EINs Below the Median",
"EINs Above the Median"))) %>%
ggplot(aes(x=fiscal_year,
y = total_compensation,
color = EIN,
group = EIN)) +
geom_line() +
geom_point() +
labs(title = "Compensation to Highest Paid Employees",
subtitle = "Total Base Compensation to Highest Paid Employees By EIN",
y = "Total Compensation",
x = "Fiscal Year")+
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
axis.text.x = element_text(size =8, vjust = .6, angle = 60)) +
facet_wrap(~tilename, scales = "free_y") +
scale_y_continuous(labels = comma) +
scale_x_continuous(labels = comma)
ggplotly(plt,height = 500, width =850) %>%
layout(margin = m)# plot compensation versus beginning year balance by fiscal year
comp <- comp_clean %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
left_join(endowment_data_wide) %>%
group_by(EIN, fiscal_year, BeginningYearBalanceAmt, organization_name) %>%
summarize(total_compensation = sum(BaseCompensationFilingOrgAmt))
plt <- comp %>%
ggplot(aes(x=BeginningYearBalanceAmt,
y = total_compensation,
label = organization_name,
color = EIN)) +
geom_point() +
facet_wrap(~fiscal_year, nrow = 2)+
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"))+
viridis::scale_color_viridis(discrete=TRUE,
option = "magma",
end = .9) +
labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
x = "Beginning of Year Balance",
y = "Total Compensation")
ggplotly(plt, height = 500, width = 850) %>%
layout(margin = m)# logged scales
plt <- comp %>%
ggplot(aes(x=BeginningYearBalanceAmt,
y = total_compensation,
color = EIN)) +
geom_point() +
facet_wrap(~fiscal_year, nrow = 2)+
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"))+
viridis::scale_color_viridis(discrete=TRUE,
option = "magma",
end = .9) +
scale_x_log10() +
scale_y_log10() +
labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
subtitle = "Both Axes on Log Scale"
x = "Beginning of Year Balance",
y = "Total Compensation")
ggplotly(plt, height = 500, width = 850) %>%
layout(margin = m)Ranking of Beginning of Year Balance Compared to Ranking of C-Suite Compensation
plot_ranks("BeginningYearBalanceAmt",
"total_compensation", data = comp )Top Employees Compensation Compared to Total Compensation
- For total employee compensation -
CYSalariesCompEmpBnftPaidAmt: Salaries, other compensation, employee benefits (Part IX, column (A), lines 5–10). - For top employee compensation - Schedule J, looking at all compensation except deferred
top <- comp_clean %>%
group_by(EIN,fiscal_year,organization_name) %>%
mutate(not_deferred = TotalCompensationFilingOrgAmt -DeferredCompensationFlngOrgAmt) %>%
summarize(num_top_employees = n(),
compensation_top_total = sum(TotalCompensationFilingOrgAmt),
compensation_top_base = sum(BaseCompensationFilingOrgAmt),
compensation_top_not_def = sum(not_deferred)) %>%
ungroup()
emp_comp <- employees %>%
left_join(top)
plt <- emp_comp %>%
ggplot(aes(x=fiscal_year,
y = compensation_top_total/CYSalariesCompEmpBnftPaidAmt,
color = organization_name,
group= EIN)) +
geom_point() +
geom_line()+
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme_bw() +
labs(y="Fraction of Total Compensation Paid",
title = "Fraction of Total Compensation Paid to C-Suite Employees",
x="Fiscal Year") +
scale_y_continuous(n.breaks = 6)+
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 12),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 500, width =850) plt <- emp_comp %>%
filter(fiscal_year !=2014 & fiscal_year !=2021) %>%
ggplot(aes(y=CYSalariesCompEmpBnftPaidAmt/TotalEmployeeCnt,
x = compensation_top_base/num_top_employees,
label=fiscal_year,
color =organization_name,
group = EIN)) +
geom_point() +
theme_bw() +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold")) +
scale_y_continuous(labels =comma) +
labs(y = "Average Employee Compensation",
x = "Average C-Suite Compensation",
title = "Average C-Suite Pay versus Overall Average Employee Compensation") +
facet_wrap(~fiscal_year)
ggplotly(plt, margin = m,height = 500, width =850 ) plt <- emp_comp %>%
mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
avg_not_top = compensation_not_top/num_not_top ,
avg_top = compensation_top_total / num_top_employees) %>%
filter(fiscal_year !=2014 & fiscal_year !=2021) %>%
# something strange with Aspen Santa Fe
filter(avg_not_top > 0) %>%
ggplot(aes(y=avg_not_top,
x =avg_top,
label=fiscal_year,
color =organization_name,
group = EIN)) +
geom_point() +
theme_bw() +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme(plot.title = element_text(size = 12,
hjust = .5, face="bold",
margin = margin(5,5,5,5)),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"),
axis.text.x = element_text(angle = 20, vjust = .6)) +
scale_y_continuous(labels =comma) +
labs(y = "Average Employee Compensation",
x = "Average C-Suite Compensation",
title = "Average C-Suite Pay versus Average Employee Compensation (Not Including C-Suite)") +
facet_wrap(~fiscal_year) +
scale_x_continuous(labels = comma)
marg <- list(
l = 50,
r = 50,
b = 250,
t = 250,
pad = 0.5
)
ggplotly(plt, margin = marg, height = 500, width =850)Comparison to Financial Data: S&P 500
sp <- read_csv(here('data','SP500.csv')) %>%
rename_with(tolower) %>%
mutate(sp500 = as.numeric(sp500)) %>%
filter(!is.na(sp500)) %>%
mutate(month = month(date),
year = year(date)) %>%
group_by(month,year) %>%
arrange(month) %>%
slice_min(n=1,order_by = date) %>%
ungroup()
sp <- read_csv(here('data','SP500.csv')) %>%
rename_with(tolower) %>%
mutate(sp500 = as.numeric(sp500)) %>%
filter(!is.na(sp500)) %>%
mutate(month = month(date),
year = year(date)) %>%
group_by(month,year) %>%
summarize(sp500 = mean(sp500, na.rm=TRUE))
endowment_sp <- endowment_data %>%
mutate(month = month(TaxPeriodEndDt),
year= year(TaxPeriodEndDt)) %>%
left_join(sp)
plt <- endowment_sp %>%
filter(variable_name %in% c("BeginningYearBalanceAmt")) %>%
group_by(EIN) %>%
mutate(m = median(value, na.rm= TRUE)) %>%
ungroup() %>%
# group_by(EIN) %>%
mutate(tile = ntile(m,2),
tilename = ifelse(tile == 1,
"EINs Below the Median",
"EINs Above the Median"))%>%
# only EINS where all observations are NA will be dropped
filter(!is.na(tilename)) %>%
mutate(date = as_date(paste0(month, "-", year), format = "%m-%Y")) %>%
mutate(normalized = value/sp500,
not = value) %>%
select(date, normalized, not,variable_name, organization_name, tilename) %>%
pivot_longer(c(normalized,not)) %>%
ggplot(aes(x=date, y = value, color = organization_name)) +
geom_point() +
geom_line() +
facet_wrap(~tilename+name, scales="free", ncol = 2)+
labs(title = "Normalizing by S&P 500") +
theme_bw() +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"),
strip.text = element_text(margin = margin(10,5,5,5,"pt")))
ggplotly(plt, margin = m, height = 500, width = 850)